// Please do not compile this file into a library, insert it into main project
// instead. Sometimes a global ctor of MMX_SSE_initialiser is not called.
#include <stdio.h>
#include "raster.h"

#if defined(USE_MMX) || defined(USE_SSE)


extern "C" unsigned GetFeaturesCPU(void);

#ifdef Conv4_8
 #undef Conv4_8
#endif
#ifdef Conv4_16
 #undef Conv4_16
#endif
#ifdef Conv4_32
 #undef Conv4_32
#endif
#ifdef Conv4_64
 #undef Conv4_64
#endif

#ifdef Conv8_4
 #undef Conv8_4z
#endif
#ifdef Conv8_16
 #undef Conv8_16
#endif
#ifdef Conv8_32
 #undef Conv8_32
#endif
#ifdef Conv8_64
 #undef Conv8_64
#endif

#ifdef Conv16_8
 #undef Conv16_8
#endif
#ifdef Conv16_32
 #undef Conv16_32
#endif
#ifdef Conv16_64
 #undef Conv16_64
#endif

#ifdef Conv32_16
 #undef Conv32_16
#endif
#ifdef Conv32_64
 #undef Conv32_64
#endif


void (*Conv4_8dyn)(uint8_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv4_8;
void (*Conv4_16dyn)(uint16_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv4_16;
void (*Conv4_32dyn)(uint32_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv4_32;
#if defined(uint64_t_defined)
 void (*Conv4_64dyn)(uint64_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv4_64;
#endif
void (*Conv8_4dyn)(uint8_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv8_4;
void (*Conv8_16dyn)(uint16_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv8_16;
void (*Conv8_32dyn)(uint32_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv8_32;
#if defined(uint64_t_defined)
 void (*Conv8_64dyn)(uint64_t *Dest, const uint8_t *Src, unsigned Size1D) = Conv8_64;
#endif
void (*Conv16_4dyn)(uint8_t *Dest, const uint16_t *Src, unsigned Size1D) = Conv16_4;
void (*Conv16_8dyn)(uint8_t *Dest, const uint16_t *Src, unsigned Size1D) = Conv16_8;
void (*Conv16_32dyn)(uint32_t *Dest, const uint16_t *Src, unsigned Size1D) = Conv16_32;
#if defined(uint64_t_defined)
void (*Conv16_64dyn)(uint64_t *Dest, const uint16_t *Src, unsigned Size1D) = Conv16_64;
#endif
void (*Conv32_16dyn)(uint16_t *Dest, const uint32_t *Src, unsigned Size1D) = Conv32_16;
void (*Conv32_64dyn)(uint64_t *Dest, const uint32_t *Src, unsigned Size1D) = Conv32_64;


class MMX_SSE_initialiser
{
public:
  MMX_SSE_initialiser(void);
};


MMX_SSE_initialiser::MMX_SSE_initialiser(void)
{
  const unsigned CPU_Features = GetFeaturesCPU();
#ifdef USE_SSE
  if(CPU_Features & 2)
  {
    Conv8_16dyn = Conv8_16_SSE;
    Conv16_8dyn = Conv16_8_SSE;
  }
#endif
#ifdef USE_MMX
  if(CPU_Features & 1)
  {
    Conv4_8dyn = Conv4_8_MMX;
    Conv4_16dyn = Conv4_16_MMX;
    Conv4_32dyn = Conv4_32_MMX;
    Conv8_4dyn = Conv8_4_MMX;
    Conv8_16dyn = Conv8_16_MMX;
    Conv8_32dyn = Conv8_32_MMX;
    Conv16_4dyn = Conv16_4_MMX;
    Conv16_8dyn = Conv16_8_MMX;
    Conv16_32dyn = Conv16_32_MMX;
    Conv32_16dyn = Conv32_16_MMX;
#if defined(uint64_t_defined)
    Conv4_64dyn = Conv4_64_MMX;
    Conv8_64dyn = Conv8_64_MMX;
    Conv16_64dyn = Conv16_64_MMX;
    Conv32_64dyn = Conv32_64_MMX;
#endif
  }
#endif
#ifdef USE_SSE
  if(CPU_Features & 2)
  {
    Conv4_8dyn = Conv4_8_SSE;
    Conv4_16dyn = Conv4_16_SSE;
    Conv4_32dyn = Conv4_32_SSE;
    Conv8_4dyn = Conv8_4_SSE;
    Conv8_16dyn = Conv8_16_SSE;
    Conv8_32dyn = Conv8_32_SSE;
    Conv16_4dyn = Conv16_4_SSE;
    Conv16_8dyn = Conv16_8_SSE;
    Conv16_32dyn = Conv16_32_SSE;
    Conv32_16dyn = Conv32_16_SSE;
#if defined(uint64_t_defined)
    Conv4_64dyn = Conv4_64_SSE;
    Conv8_64dyn = Conv8_64_SSE;
    Conv16_64dyn = Conv16_64_SSE;
    Conv32_64dyn = Conv32_64_SSE;
#endif
  }
#endif
}


MMX_SSE_initialiser MMX_SSE_init;	/// Initialiser ctror should be called before main()


#endif